This document was created by Amanda Welch, M.S.

All data was collected on site between May 22 and May 24, 2024 by sampling vehicles upon entry. A total of 941 surveys were collected. Box Office recorded 2,328 tickets used for entry.

Data was input by volunteers into Qualtrics and raw numbers were downloaded for analysis.

Analysis and visualization done in R and RStudio. RStudio Team (2022). RStudio: Integrated Development Environment for R. RStudio, PBC, Boston, MA URL http://www.rstudio.com/.

For questions or comments: or

knitr::opts_chunk$set(echo = TRUE)
options(warn = -1)
library(ggplot2)
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)
library(RColorBrewer)
library(scales)
library(ggrepel)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ readr::col_factor() masks scales::col_factor()
## ✖ purrr::discard()    masks scales::discard()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::lag()        masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# set working directory
# setwd("C:/Users/Amanda Welch/Documents/GitHub/SOAK-Census") #on Amanda's
setwd("/Users/airtracy/Documents/GitHub/SOAK-Census") #on Tracy's
# import data
# On Amanda's:
# data <- read.csv("C:/Users/Amanda Welch/Desktop/SOAK 2023/census_data.csv")

# On Tracy's:
# 2023 Census data:
 data <- read.csv("/Users/airtracy/Documents/SOAK-Census_copy/census_data.csv") 

# convert column names to lower case
data <- clean_names(data)

#remove 1st 2 rows with unnecessary info
data <- data[-(1:2),]
# On Amanda's:
#data_2024 <- read.csv("C:/Users/Amanda Welch/Desktop/SOAK 2024/SOAK 2024_RawData.csv")

# On Tracy's:
# 2024 Census data:
data_2024 <- read.csv("/Users/airtracy/Documents/SOAK-Census_copy/SOAK\ 2024_RawData.csv")
data_2024 <- clean_names(data_2024)
data_2024 <- data_2024[-(1:2),]
data_2024 <- data_2024[, !names(data_2024) %in%
               c("start_date","end_date","status","ip_address","progress","duration_in_seconds",
                 "finished","recorded_date","response_id","recipient_last_name",
                 "recipient_first_name", "recipient_email","external_reference","location_latitude",
                 "location_longitude","distribution_channel","user_language","q_recaptcha_score")]
# rename columns for 2024
data_2024 <- data_2024 %>% 
  dplyr::rename(
    attend = q1,
    age = q2,
    race = q3,
    born = q4,
    language = q5,
    gender = q6,
    orientation = q7,
    relationship = q8,
    religion = q9,
    politics = q10,
    edu = q11,
    income = q12,
    zip_code = q13,
    arrive = q14,
    rv = q15,
    ticket = q16,
    move = q17,
    participate = q18,
    sustain = q19,
    townhall = q22,
    bm = q23,
    info = q24,
    ada = q25,
    military = q26,
    kids = q27,
    teens = q28,
    time_arrival = q29
    
  )
# drop irrelevant columns generated by data entry 
data <- data[, !names(data) %in%
               c("start_date","end_date","status","ip_address","progress","duration_in_seconds",
                 "finished","recorded_date","response_id","recipient_last_name",
                 "recipient_first_name", "recipient_email","external_reference","location_latitude",
                 "location_longitude","distribution_channel","user_language","q_recaptcha_score")]

# rename columns
data <- data %>% 
  dplyr::rename(
    attend = q1,
    age = q2,
    race = q3,
    born = q4,
    language = q5,
    gender = q6,
    orientation = q7,
    relationship = q8,
    religion = q9,
    politics = q10,
    edu = q11,
    income = q12,
    zip_code = q13,
    arrive = q14,
    rv = q15,
    ticket = q16,
    move = q17,
    participate = q18,
    sustain = q19,
    kids = q20,
    teens = q21,
    townhall = q22,
    bm = q23,
    info = q24
  )

# re-writing this entry - entered with "it's my 1st year" as well which can't be true if they also attended 3 years prior
data$attend[328] <- "2022,2019,2018"

Attendance

# rows (sometimes) store several year
# pull them out and separate years so they can be counted individually
attend_list <- strsplit(data$attend, ",")
attend_unlist <- unlist(attend_list)
occurrences <- table(attend_unlist)
occurrences_df <- as.data.frame(occurrences)

# get attendance count
# counting using commas - 0 commas per row means only have attended once, 1 comma = twice, etc.
# exclude virgins - will be its own count
attend_count <- str_count(data$attend[data$attend != "It's my first year!" &
                                        data$attend != "Omitted"], ",")
virgins <- sum(data$attend == "It's my first year!")

count_0 <- sum(attend_count == 0)
count_1_2 <- sum(attend_count %in% c(1, 2))
count_3_6 <- sum(attend_count %in% c(3, 4, 5, 6))
count_7_or_more <- sum(attend_count >= 7)

# create a new data frame
attend_count_df <- data.frame(attendance = c("0","1", "2-3", "4-7", "8 or more"), 
                              count = c(virgins, count_0, count_1_2, count_3_6, count_7_or_more))
attend_count_df$percent <- attend_count_df$count / sum(attend_count_df$count) * 100
attend_count_df$percent_round <- round(attend_count_df$percent, digits = 1)
#replace blanks with Omitted
data_2024[data_2024 == ""] <- "Omitted"
# rows (sometimes) store several year
# pull them out and separate years so they can be counted individually
attend_list_24 <- strsplit(data_2024$attend, ",")
attend_unlist_24 <- unlist(attend_list_24)
occurrences_24 <- table(attend_unlist_24)
occurrences_df_24 <- as.data.frame(occurrences_24)

# get attendance count
# counting using commas - 0 commas per row means only have attended once, 1 comma = twice, etc.
# exclude virgins - will be its own count
attend_count_24 <- str_count(data_2024$attend[data_2024$attend != "It's my first year!" &
                                        data_2024$attend != "Omitted"], ",")
virgins_24 <- sum(data_2024$attend == "It's my first year!")

count_0_ <- sum(attend_count_24 == 0)
count_1_2_ <- sum(attend_count_24 %in% c(1, 2))
count_3_6_ <- sum(attend_count_24 %in% c(3, 4, 5, 6))
count_7_or_more_ <- sum(attend_count_24 >= 7)

# create a new data frame
attend_count_df_24 <- data.frame(attendance_24 = c("0","1", "2-3", "4-7", "8 or more"), 
                              count_24 = c(virgins_24, count_0_, count_1_2_, count_3_6_, count_7_or_more_))
attend_count_df_24$percent_24 <- attend_count_df_24$count_24 / sum(attend_count_df_24$count_24) * 100
attend_count_df_24$percent_round_24 <- round(attend_count_df_24$percent, digits = 1)
attend_count_df <- cbind(attend_count_df, attend_count_df_24)
attend_count_df <- subset(attend_count_df, select = -attendance_24)
rm(attend_count_df_24)
# plot which years were reported attended
# occurrences_df %>% 
#   ggplot(aes(x = attend_unlist,
#              y = Freq)) +
#   geom_bar(stat = "identity",
#            color = "black",
#            fill = "turquoise") +
#   labs(x = "Year attended",
#        y = "Number of participants",
#        title = "\nHow many participants were at which years") +
#   theme_minimal() +
#   theme(axis.text.x = element_text(angle = 45, hjust = 1))
# plot which years were reported attended
occurrences_df_24 %>% 
  ggplot(aes(x = attend_unlist_24,
             y = Freq)) +
  geom_bar(stat = "identity",
           color = "black",
           fill = "turquoise") +
  labs(x = "Year attended",
       y = "Number of participants",
       title = "\nHow many participants were at which years") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

A total of 376 participants (40.6% of participants) reported 2024 as their first time attending SOAK, which is a decrease from 2023 which had 43.2% of the population report themselves as first timers.

# plot how many years attended
attend_count_df %>%
  ggplot(aes(x = attendance, y = count_24, fill = count_24)) +
  geom_bar(stat = "identity", color = "black", alpha = 0.8) +
  labs(x = "Number of trips to SOAK",
       y = "Number of participants",
       title = "\n\n\nHow many times participants have previously attended SOAK",
       caption = "Plot excludes omitted responses") +
  geom_text(aes(label = count_24), 
            position = position_stack(vjust = 0.5), 
            size = 3.5, color = "black") +
  guides(fill = FALSE) + #remove legend
  theme_minimal()

# create variable of participants used instead of hard coding value in chart
participants <- sum(attend_count_df$count_24)

# maths for pie chart
pie_labels <- attend_count_df %>% 
  mutate(csum = rev(cumsum(rev(percent_round_24))),
         pos = percent_round_24/2 + lead(csum, 1),
         pos = if_else(is.na(pos), percent_round_24/2, pos))

# the plot
attend_count_df %>% 
  ggplot(aes(x = "", y = percent_round_24, fill = fct_inorder(attendance))) +
  geom_col(width = 1, color = 1) +
  coord_polar(theta = "y") +
  scale_fill_brewer(palette = "Pastel2") +
  geom_label_repel(data = pie_labels,
                   aes(y = pos, label = paste0(percent_round_24, "%")),
                   size = 4.5, nudge_x = 1, show.legend = FALSE) +
  guides(fill = guide_legend(title = "Previous years attended")) +
  ggtitle("\n\n\nAttendance history") +
  labs(caption = paste(participants, "participants, excluding omitted responses \n")) +
  theme_void()

# Reshape the data into long format
attend_long <- attend_count_df %>%
  pivot_longer(cols = c("percent", "percent_24"), 
               names_to = "year", 
               values_to = "percent") %>%
  mutate(year = recode(year, "percent" = "2023", "percent_24" = "2024"))
# Create spaghetti plot
attend_long %>% 
  ggplot(aes(x = year, y = percent, group = attendance, color = attendance)) +
  geom_line(size = 1) +
  geom_point(size = 3) +  # Optional, to emphasize points
  labs(title = "Attendance change over time",
       x = "Year",
       y = "Percentage",
       color = "How many times \nattended before 2024") +
  theme_minimal()

Previous years attended Count Percent
0 376 40.6
1 219 23.6
2-3 184 19.8
4-7 112 12.1
8 or more 36 3.9

The bar chart and pie chart above are 2 different visualizations of the same information. The bar chart shows us the count of how many times people have attended SOAK before, whereas the pie chart is the percent of each group.The line plot demonstrates the changes in each attendance category over one year.

According to the Burning Man Census Report, the population had 28.3% first-time burners in 2022, with a huge increase to 43.9% first-timers in 2023 (Shev, A.B., Morency A., and the 2023 Census Lab) in comparison to SOAK’s 43.2% fist-time SOAKer in 2022 and a slight decrease to 40.6% in 2023.

Sociodemographic Characteristics

Age and Gender

# create df counting the number of participants in each age group
age_counts <- table(data$age)
age <- as.data.frame(age_counts)
age$Var1 <- factor(age$Var1, levels = c("Under 13", "13-17", "18-24",
                                        "25-34", "35-44", "45-54", "55-64",
                                        "Over 65", "Omitted"))


# gender identity is tricky, as some people chose multiple identities
# for those who selected multiple, save new variable that replaces multiple choices with the single 
# answer of "multiple identities"
# plotting all responses would be difficult to visualize well
data$gender_updated <- ifelse(grepl(",", data$gender), "Multiple identities", data$gender)
age_gender_counts <- table(data$age, data$gender_updated)
age_gender_counts_df <- as.data.frame(age_gender_counts)
colnames(age_gender_counts_df) <- c("age", "gender", "count")

# drop counts of 0 and omitted responses
age_gender_filtered_df <- age_gender_counts_df[age_gender_counts_df$count != 0, ]
age_gender_filtered_df <- age_gender_filtered_df[age_gender_filtered_df$age != "Omitted", ]
age_gender_filtered_df <- age_gender_filtered_df[age_gender_filtered_df$gender != "Omitted", ]

# change under 13 to 12 and below for ordering on the graph
age_gender_filtered_df$age <- gsub("Under 13", "0-12", age_gender_filtered_df$age)
# plot ages and gender
age %>% 
  ggplot(aes(x = Var1,
             y = Freq,
             fill = Var1)) +
  geom_bar(stat = "identity",
           color = "black") +
  scale_fill_brewer(name = "Age group counts",
                    palette = "RdYlGn",
                    labels = c("7","7","12","238",
                               "260", "97", "26", "15", "8")) +
  labs(x = "Age range",
       y = "Number of participants",
       title = "\nAge of participants") +
  theme_minimal() 

# table
age$Percent <- round((age$Freq / sum(age$Freq) * 100), digits = 1)
colnames(age)[1] <- "Age group"
colnames(age)[2] <- "Count"

knitr::kable(age)
Age group Count Percent
13-17 7 1.0
18-24 12 1.8
25-34 238 35.5
35-44 260 38.8
45-54 97 14.5
55-64 26 3.9
Omitted 8 1.2
Over 65 15 2.2
Under 13 7 1.0

The vast majority (74.3%) of SOAK participants range from age 25 to 44. Ages 25-34 and 35-44 are similarly reported, with 45-54 being the 3rd most common age group at 14.5% of participants. All other age groups make up 9.9% of the population.

This age data tracks with the 2022 Burning Man median age of arrival. The median age was 37 years (Shev, A.B., Morency A., and the 2022 Census Lab.).

genders <- table(data$gender_updated)
genders <- as.data.frame(genders)
genders$percent <- round((genders$Freq / sum(genders$Freq) * 100), digits = 1)

# maths for pie chart
gender_pie_labels <- genders %>% 
  mutate(csum = rev(cumsum(rev(percent))),
         pos = percent/2 + lead(csum, 1),
         pos = if_else(is.na(pos), percent/2, pos))

# the plot
genders %>% 
  ggplot(aes(x = "", y = percent, fill = fct_inorder(Var1))) +
  geom_col(width = 1, color = 1) +
  coord_polar(theta = "y") +
  scale_fill_brewer(palette = "Pastel2") +
  geom_label_repel(data = gender_pie_labels,
                   aes(y = pos, label = paste0(percent, "%")),
                   size = 4.5, nudge_x = 1, show.legend = FALSE) +
  guides(fill = guide_legend(title = "Gender")) +
  ggtitle("\n\n\nGender identity") +
  labs(caption = paste(sum(genders$Freq), "participants, including omitted responses")) +
  theme_void()

# gender x age
age_gender_filtered_df %>% 
  ggplot(aes(x = age,
             y = count,
             fill = reorder(gender, -count))) +
  geom_bar(stat = "identity",
           color = "black",
           position = "dodge") +
  scale_fill_manual(values = c("gray0", "darkorchid2", "green",
                               "cyan2", "firebrick2", "honeydew",
                               "tan2")) +
  labs(x = "Age range",
       y = "Number of participants",
       title = "\n\n\nGender identity grouped by age",
       caption = "Plot excludes omitted repsonses from either category",
       fill = "Gender") +
  theme_minimal()

# renaming columns
new_names <- c("Gender identity", "Count", "Percent")
colnames(genders) <- new_names

knitr::kable(genders)
Gender identity Count Percent
Gender fluid 12 1.8
Man 301 44.9
Multiple identities 27 4.0
Non-binary / third gender 43 6.4
Omitted 2 0.3
Other 9 1.3
Unsure/Questioning 4 0.6
Woman 272 40.6

Men comprised the most commonly reported gender at SOAK (44.9%), followed by women at 40.6%. Together, this totaled 85.5% of the population. Non-binary, multiple identities, and gender fluid were the next commonly reported gender identities at 6.4%, 4%, and 1.8% respectively.

When grouped by age, we see the most diversity in gender reporting in ages 25-44. These were the predominant age groups at SOAK, so a broader capture in gender would be expected since the sample size of these ages were the largest.

In contrast to the Burning Man Census report, in 2022, 1.1% of the population reported Non-binary. The Burning Man Census report included transgender identities, cisgender identities, and male/female identities. The state of Oregon has collected data on gender identity in 11th grade youth, reporting 5.5% non-binary gender in 2019 (oregon dot gov ^ 1). Statistics on adults is inconsistent and not well reported.

The SOAK Census survey offered multiple options for gender to attempt to capture the most accurate details of the population.

Income and Education

# replace blank cells with omitted in these categories
data$edu[data$edu == ""] <- "Omitted"
data$income[data$income ==""] <- "Omitted"

edu <- table(data$edu)
edu <- as.data.frame(edu)
colnames(edu) <- c("edu", "count")
edu$percent <- round((edu$count / sum(edu$count) * 100), digits = 1)

# order for the plot
desired_edu_order <- c("Some K-12","High school (diploma or GED)","Some college","Associates",
                       "Vocational/trade","Bachelors", "Masters", "Doctoral","None","Omitted")

# convert the "edu" column to a factor with the desired order
edu$edu <- factor(edu$edu, levels = desired_edu_order)

income <- table(data$income)
income <- as.data.frame(income)
colnames(income) <- c("income", "count")
income$percent <- round((income$count / sum(income$count) * 100), digits = 1)

desired_income_order <- c("Less than $12,880","$12,880 - 24,999","$25,000 - 49,999",
                          "$50,000 - 74,999","$75,000 - 99,999","$100,000 - 149,999", 
                          "$150,000 - 199,999", "Above $200,000","Omitted")

# Convert the "edu" column to a factor with the desired order
income$income <- factor(income$income, levels = desired_income_order)
colnames(edu) <- sapply(colnames(edu), function(x) paste(toupper(substr(x, 1, 1)), 
                                                           substr(x, 2, nchar(x)), sep = ""))
edu %>% 
  ggplot(aes(x = Edu, y = Percent)) +
  geom_bar(stat = "identity", color = "blue", fill = "black") +
  labs(x = "Level of education completed",
       y = "SOAK participants in 2023 (%)",
       title = "\n\nHighest level of education completed") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_discrete(labels = c("Some K-12","High school","Some college","Associates",
                       "Vocational/trade","Bachelors", "Masters", "Doctoral",
                       "None","Omitted"))

knitr::kable(edu)
Edu Count Percent
Associates 44 6.6
Bachelors 288 43.0
Doctoral 42 6.3
High school (diploma or GED) 32 4.8
Masters 123 18.4
None 1 0.1
Omitted 10 1.5
Some college 94 14.0
Some K-12 15 2.2
Vocational/trade 21 3.1

The highest level of education most reported by SOAK participants was a Bachelor’s degree at 43%. This is less than 52% of Burning Man participants in 2022, but more than the state of Oregon which reports 35% of the population but includes both Bachelor’s or advanced degree in their reporting (oregon dot gov ^ 2). This Oregon data is from 2019, thus likely does not represent current educational achievements.

colnames(income) <- sapply(colnames(income), function(x) paste(toupper(substr(x, 1, 1)), 
                                                           substr(x, 2, nchar(x)), sep = ""))

income %>% 
  ggplot(aes(x = Income, y = Percent)) +
  geom_bar(stat = "identity", color = "black", fill = "darkgreen", alpha = 0.7) +
  labs(x = "Income",
       y = "SOAK participants in 2023 (%)",
       title = "\n\nPersonal income over the last 12 months") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

income_sorted <- income %>% 
  arrange(Income)

knitr::kable(income_sorted)
Income Count Percent
Less than $12,880 51 7.6
$12,880 - 24,999 76 11.3
$25,000 - 49,999 116 17.3
$50,000 - 74,999 120 17.9
$75,000 - 99,999 81 12.1
$100,000 - 149,999 106 15.8
$150,000 - 199,999 43 6.4
Above $200,000 40 6.0
Omitted 37 5.5

Personal income is distributed into categories fairly evenly, with the least frequently reported values below $12,880 and over $150,000, with the most amount of participants reporting the $50,000-$74,999 range at 17.9%.

Burning Man’s most commonly reported income in 2022 was $100,00-$149,999 at 17.8%. Considering that most Burners arrive from Southern California, a discrepancy in income would be expected due to differing costs of living. The median Oregon household income in 2022 was $76,632.

Residence

library(zipcodeR)
data("zip_code_db")

# manually add information for ZIP code 97003
new_info <- data.frame(zipcode = "97003", major_city = "Beaverton", state = "OR",
                       lat = 45.51, lng = -122.88)

# add the new information to zip_code_db
zip_code_db <- bind_rows(zip_code_db, new_info)

data$zip_code[data$zip_code == ""] <- NA
#52 rows omitted as NA

zip_codes <- table(data$zip_code)
zip_codes <- as.data.frame(zip_codes)
colnames(zip_codes) <- c("zipcode", "count")

zip_geos <- geocode_zip(data$zip_code)
zips_merged <- left_join(zip_codes, zip_geos, by = "zipcode")


# function to get city and state information for a given ZIP code
get_city_state_for_zip <- function(zipcode) {
  zip_info <- zip_code_db[zip_code_db$zipcode == as.character(zipcode), c("major_city", "state")]
  city <- ifelse(length(zip_info$major_city) > 0, zip_info$major_city, NA)
  state <- ifelse(length(zip_info$state) > 0, zip_info$state, NA)
  return(data.frame(City = city, State = state))
}

# apply the function to each unique zip code in zips_merged
zips_merged <- zips_merged %>%
  rowwise() %>%
  do(get_city_state_for_zip(.$zipcode)) %>%
  bind_cols(zips_merged, .)

# lat and lng of cities entered by df

zips_merged$City[9] <- "Duisburg"
zips_merged$State[9] <- "Germany"

zips_merged$lat[16] <- 33.21
zips_merged$lng[16] <- -97.13

zips_merged$City[30] <- "Anjoutey"
zips_merged$State[30] <- "France"

zips_merged$lat[70] <- 45.49
zips_merged$lng[70] <- -122.88

zips_merged$City[98] <- "Portland"
zips_merged$State[98] <- "OR"

zips_merged$City[108] <- "Portland"
zips_merged$State[108] <- "OR"

zips_merged$City[113] <- "Salem"
zips_merged$State[113] <- "OR"

zips_merged$lat[123] <- 43.4
zips_merged$lng[123] <- -124.03

zips_merged$City[125] <- "Schweinfurt"
zips_merged$State[125] <- "Germany"

zips_merged$lat[133] <- 44.06
zips_merged$lng[133] <- -121.34

zips_merged$City[141] <- "Seattle"
zips_merged$State[141] <- "WA"

zips_merged$lat[142] <- 47.68
zips_merged$lng[142] <- -122.21

zips_merged$City[193] <- "Sacramento"
zips_merged$State[193] <- "CA"
zips_merged_clean <- na.omit(zips_merged)
usa_map <- map_data("state")

ggplot() +
  geom_polygon(data = usa_map, aes(x = long, y = lat, group = group), 
               fill = "white", color = "gray") +
  geom_point(data = zips_merged_clean, aes(x = lng, y = lat, size = count, color = count), 
             show.legend = TRUE) +
  scale_size_continuous(range = c(1, 10)) +
  scale_color_gradient(low = "blue", high = "red") +  # Set the color scale
  labs(title = "Zip code density map of SOAK attendees in the United States") +
  theme_minimal() +
  theme(axis.text.x = element_blank(),
        axis.text.y = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_blank()) +
  guides(size = "none") +
  labs(color = "Zip code frequency") +
  coord_fixed(ratio = 1.5)

state_counts <- zips_merged %>%
  group_by(State) %>%
  summarize(Count = sum(count))

state_counts <- state_counts  %>% 
  arrange(desc(Count))

state_counts <- na.omit(state_counts)
state_counts$Percent <- round((state_counts$Count / sum(state_counts$Count) * 100), digits = 1)

knitr::kable(state_counts, row.names = FALSE)
State Count Percent
OR 434 70.6
WA 118 19.2
CA 28 4.6
ID 11 1.8
NY 4 0.7
CO 2 0.3
Germany 2 0.3
MA 2 0.3
MO 2 0.3
MT 2 0.3
NV 2 0.3
France 1 0.2
GA 1 0.2
IA 1 0.2
LA 1 0.2
ME 1 0.2
NC 1 0.2
TX 1 0.2
UT 1 0.2

Most SOAK participants reside in Oregon, followed by Washington then California. We had visitors from all across the country, including New York, Montana, Georgia, and many others. In addition to visitors from out of state, we had a total of 3 participants report foreign postal codes: 2 from Germany and 1 from France!

city_counts <- zips_merged %>% 
  group_by(City) %>% 
  summarize(Count = sum(count))

city_counts <- city_counts %>% 
  arrange(desc(Count))

city_counts <- na.omit(city_counts)
city_counts$Percent <-  round((city_counts$Count / sum(city_counts$Count) * 100), digits = 1)

knitr::kable(head(city_counts, 10))
City Count Percent
Portland 307 49.9
Seattle 64 10.4
Eugene 42 6.8
Vancouver 16 2.6
Corvallis 10 1.6
Springfield 9 1.5
Beaverton 8 1.3
Bend 7 1.1
Hillsboro 7 1.1
Salem 7 1.1

When looking at specific cities, the top 10 reported were in Oregon and Washington, with Portland accounting for almost half (49.9%) of all responses.

Ethnicity and Language

#For race, multiracial was an option and some participants selected multiple races. Replace those entries with multiracial

data$race_cleaned <- gsub(".*\\,.*", "Multiracial", data$race)

#replace entry of human with omitted
data$race_cleaned[data$race_cleaned == ""] <- "Omitted"
data$race_cleaned <- gsub("Human", "Omitted", data$race_cleaned)

for (i in 1:length(data$race_cleaned)) {
  if (grepl(":)", data$race_cleaned[i], fixed = TRUE)) {
    data$race_cleaned[i] <- "Omitted"
  }
}
race <- table(data$race_cleaned)
race <- as.data.frame(race)

colnames(race) <- c("race", "count")
race$percent <- round((race$count / sum(race$count) * 100), digits = 1)
race <- race %>% arrange(desc(percent))
race$race <- factor(race$race, levels = race$race)

# Create the bar plot with reordered data
race %>%
  ggplot(aes(x = reorder(race, -percent), y = percent)) +
  geom_bar(stat = "identity", color = "black", fill = "pink") +
  labs(
    x = "Ethnicity",
    y = "SOAK participants in 2023 (%)",
    title = "\n\nEthnoracial background"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# plotting table of values
colnames(race) <- sapply(colnames(race), function(x) paste(toupper(substr(x, 1, 1)), 
                                                           substr(x, 2, nchar(x)), sep = ""))

knitr::kable(race)
Race Count Percent
White or Caucasian 518 77.3
Multiracial 68 10.1
Hispanic or Latinx 23 3.4
Asian 20 3.0
Other 11 1.6
Middle Eastern or North African 10 1.5
Black or African American 9 1.3
Omitted 8 1.2
Native Hawaiian or Pacific Islander 2 0.3
Native American or Native Alaskan 1 0.1

White was the vast majority of SOAK participants at 77.3%, followed by multiracial at 10.1%. Hispanic was the 3rd most reported race at 3.4%.

SOAK had a similar to (but less than) white population than Burning Man’s 80.5%. Hispanic was the 2nd most common race at Burning Man with 9.6%. Multiracial was categorized as Other in the Burning Man Census, coming in at 3.3%.

According to Data USA, Oregon’s non-Hispanic white population in 2021 was 74.1%. Multiracial was categorized by non-Hispanic and Hispanic at 4.73% and 3%, respectively. Asian was 4.38% compared to SOAK’s 3%. For full Oregon statistics see: https://datausa.io/profile/geo/oregon#demographics

birth <- table(data$born)
birth <- as.data.frame(birth)

colnames(birth) <- c("response", "count")

birth$percent <- round((birth$count / sum(birth$count) * 100), digits = 1)
#maths for pie chart
pie_labels_birth <- birth %>% 
  mutate(csum = rev(cumsum(rev(percent))),
         pos = percent/2 + lead(csum, 1),
         pos = if_else(is.na(pos), percent/2, pos))

#the plot
birth %>% 
  ggplot(aes(x = "", y = percent, fill = fct_inorder(response))) +
  geom_col(width = 1, color = 1) +
  coord_polar(theta = "y") +
  scale_fill_brewer(palette = "Pastel2") +
  geom_label_repel(data = pie_labels_birth,
                   aes(y = pos, label = paste0(percent, "%")),
                   size = 4.5, nudge_x = 1, show.legend = FALSE) +
  guides(fill = guide_legend(title = "Question: Born in the US?")) +
  ggtitle("\n\n\nPercent of participants born in the US") +
  labs(caption = paste(sum(birth$count), "participants, including omitted responses \n")) +
  theme_void()

Most participants were born in the USA. In Oregon in 2021, the foreign-born population was 9.8% compared to SOAK’s 9.1% foreign-born population (Data USA).

# create df only containing language data
language_df <- data %>%
  select(language, q5_12_text)

# remove omitted response
language_df <- language_df %>%
  filter(language != "Omitted")

language_df <- language_df %>% 
  na.omit(language_df)

# replace other entry with the language that was entered manually
language_df <- language_df %>%
  mutate(language = str_replace(language, "Other", q5_12_text))

# drop 2nd column
language_df <- language_df %>%
  select(-ncol(language_df))

# Count rows where language is "English" only
english_only_count <- sum(grepl("^English$", language_df$language, ignore.case = TRUE))

# Count rows where language contains "English" and other languages
english_with_other_count <- sum(grepl("English", language_df$language, ignore.case = TRUE) & !grepl("^English$", language_df$language, ignore.case = TRUE))

# Count rows where language does not contain "English"
non_english_count <- sum(!grepl("English", language_df$language, ignore.case = TRUE))

# Calculate percentages of languages spoken in relation to English
language_percent <- c(non_english_count/nrow(language_df)*100,
                      english_with_other_count/nrow(language_df)*100,
                      english_only_count/nrow(language_df)*100)

language_percent <- round(language_percent, 1)
language_percent_names <- c("Non-English", "English and 1+ language", "English")

                      
# Create the data frame
language_percent_df <- data.frame(Language = language_percent_names, Percentage = language_percent)
# Plot
language_percent_df %>% 
  ggplot(aes(x = Language, y = Percentage, fill = Language)) +
  geom_bar(stat = "identity", position = "stack", alpha = 0.9, color = "black") +
  geom_text(aes(label = sprintf("%.1f%%", Percentage)), 
            position = position_stack(vjust = 0.5), 
            size = 3, color = "black") +
  labs(title = "Language use by partcipants", x = "Language", y = "Percentage") +
  theme_minimal()

language_df_counts <- language_df %>%
  separate_rows(language, sep = ",") %>%
  group_by(language) %>%
  count()

# capitalize languages
language_df_counts <- language_df_counts %>%
  mutate(language = str_to_title(language))

colnames(language_df_counts) <- c("Language", "Count")

knitr::kable(language_df_counts, caption = "Languages spoken by participants")
Languages spoken by participants
Language Count
Arabic 1
Chinese 3
English 660
Farsi 1
French 4
German 3
Hebrew 1
Korean 2
Russian 4
Spanish 5
Ukrainian 3
Urdu 1
Bahasa 1
Kree 1

English was the primary language spoken by SOAK participants, with 3.9% of participants fluent in another language. Spanish was the second most commonly reported language.

Spirituality and Politics

# create df only containing rel and pol data
religion_politics <- data %>%
  select(religion, q9_8_text, politics, q10_8_text)

# for religion, many entries of other with specific religion listed instead of religious
# survey did not ask specifically for theistic vs non-theistic religion, simply religious
# replace specified spiritualities claimed as spiritual, otherwise religious 
religion_politics <- religion_politics %>%
  mutate(religion = case_when(
    str_detect(q9_8_text, "Druidic|Hoodoo") ~ "Spiritual",
    TRUE ~ str_replace(religion, "Other", "Religious")
  ))

religion_politics$religion[str_detect(religion_politics$religion, ",")] <- "Multiple responses"

# capitalize political parties
religion_politics <- religion_politics %>%
  mutate(q10_8_text = str_to_title(q10_8_text))

# replace other entry with the political that was entered manually
religion_politics <- religion_politics %>%
  mutate(politics = str_replace(politics, "Other", q10_8_text))

religion_politics$politics[religion_politics$politics == ""] <- "Omitted"

# replace all occurrences of "Not Sure" with "Unsure"
religion_politics$politics <- gsub("Not Sure", "Unsure", religion_politics$politics)

# cleaning up manual entries to closest match, I think
religion_politics$politics <- sub("Anarchy", "Anarchist", religion_politics$politics)
religion_politics$politics <- gsub("Communist.*?\\(Voting Dem\\)", "Communist", religion_politics$politics)
religion_politics$politics <- sub("Dem Soc", "Democratic Socialist", religion_politics$politics)
religion_politics$politics <- sub("No Party Affiliation", "None", religion_politics$politics)
religion_politics$politics <- sub("They All Suck.", "None", religion_politics$politics)
religion_politics$politics <- sub("Moderate. Progun. Prochoice", "Unsure", religion_politics$politics)
religion_politics$politics <- sub("Open-Minded", "Unsure", religion_politics$politics)
religion_politics$politics <- sub("Unsure Just Be Good To Others", "Unsure"
                                  , religion_politics$politics)
religion_politics$politics <- sub("Actual Liberal", "Democrat", religion_politics$politics)
religion_politics$politics <- sub("So Far Left", "Democrat", religion_politics$politics)
religion_politics$politics <- sub("As Left As I Can Vote", "Democrat", religion_politics$politics)
religion_politics$politics <- sub("Anti-Authoritarian Left", "Libertarian Socialist", religion_politics$politics)
religion_politics$politics <- gsub('I"M A Kid', "None", religion_politics$politics)

# make tedious slightly less tedious
values_to_replace <- c("Space", "Men", "Angry", "Angry Leftist", "Annoyed",
                       "Fuck The Gov", "Fuck The Gov And Their Politics", "Human",
                       "Apolitical", "Me", "IM A Kid"
                       )
replacement_value <- "None"
religion_politics$politics <- gsub(paste(values_to_replace, collapse = "|"), 
                                   replacement_value, religion_politics$politics)


religion_politics$politics[str_detect(religion_politics$politics, ",")] <- "Multiple responses"
politics_counts <- table(religion_politics$politics)
religion_counts <- table(religion_politics$religion)

# calculate percentages
politics_percentages <- round(prop.table(politics_counts) * 100, 1)
religion_percentages <- round(prop.table(religion_counts) * 100, 1)

politics_df <- data.frame(
  Politics = names(politics_percentages),
  Counts_Politics = as.numeric(politics_counts),
  Percentage_Politics = politics_percentages
)

religion_df <- data.frame(
  Religion = names(religion_percentages),
  Counts_Religion = as.numeric(religion_counts),
  Percentage_Religion = religion_percentages
)

colnames(religion_df)[colnames(religion_df) == "Percentage_Religion.Freq"] <- "Percent"
colnames(religion_df)[colnames(religion_df) == "Counts_Religion"] <- "Count"
religion_df <- subset(religion_df, select = -Percentage_Religion.Var1)

colnames(politics_df)[colnames(politics_df) == "Percentage_Politics.Freq"] <- "Percent"
colnames(politics_df)[colnames(politics_df) == "Counts_Politics"] <- "Count"
politics_df <- subset(politics_df, select = -Percentage_Politics.Var1)
politics_df$Politics <- factor(politics_df$Politics, 
                               levels = politics_df$Politics[order(politics_df$Percent, decreasing = TRUE)])

politics_df %>% 
  ggplot(aes(x = Politics, y = Percent)) +
  geom_bar(stat = "identity", color = "black", fill = "lavender") +
  labs(
    x = "Political party",
    y = "SOAK participants in 2023 (%)",
    title = "\nPolitical party affiliation"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

politics_df <- politics_df %>% 
  arrange(desc(Count))

knitr::kable(politics_df)
Politics Count Percent
Democrat 296 44.2
None 150 22.4
Progressive 71 10.6
Independent 48 7.2
Multiple responses 41 6.1
Green 20 3.0
Omitted 15 2.2
Libertarian 9 1.3
Unsure 5 0.7
Anarchist 4 0.6
Communist 3 0.4
Democratic Socialist 2 0.3
Republican 2 0.3
Libertarian Socialist 1 0.1
Radical 1 0.1
Socialist 1 0.1
Working Families 1 0.1

Democrat was the most common political party affiliation at 44.2%, followed by None, then Progressive. Compared to the Burning Man population, SOAK had less Democrats (54.4% at Burning Man), less None (34.1% at BM), and Progressive was not categorized in the Burning Man Census. The Burning Man Census report listed 4 major parties, compared to SOAK which lists 16 political ideologies as offered on the survey or filled in by the participant.

religion_df %>%
  ggplot(aes(x = Religion, y = Percent, fill = Religion)) +
  geom_bar(stat = "identity", color = "black", alpha = 0.8) +
  scale_fill_brewer(palette = "GnBu") +  
  labs(
    x = "Religion",
    y = "SOAK participants in 2023 (%)",
    title = "\nReligious or spiritual identity"
  ) +
  geom_text(aes(label = sprintf("%.1f%%", Percent)), 
            position = position_stack(vjust = 0.5), 
            size = 3.5, color = "black") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

religion_df <- religion_df  %>% 
  arrange(desc(Percent))

knitr::kable(religion_df)
Religion Count Percent
Spiritual 224 33.4
Atheist 112 16.7
None 96 14.3
Agnostic 94 14.0
Multiple responses 52 7.8
Unsure 45 6.7
Religious 36 5.4
Omitted 9 1.3
Deist 2 0.3

When it comes to religious or spiritual identity, most participants report being spiritual at 33.4%. This is followed by atheism (16.7%), none (14.3%), then agnostic (14.0%). In 2022, 47.7% of Burning Man participants reported being spiritual and 23.2% reported being atheist. Interestingly, both SOAK and Burning Man participants reported being religious at the same rate of 5.4%.

Sexuality and Relationships

# create df only containing orientation and relationship data
ori_rel <- data %>%
  select(orientation, q7_7_text, relationship, q8_6_text)

ori_rel$q7_7_text[ori_rel$q7_7_text == "Demi"] <- "Demisexual"

# replace other entry with the what was entered manually
ori_rel <- ori_rel %>%
  mutate(orientation = str_replace(orientation, "Other:", q7_7_text))

#clean up manual entries to make some sense
ori_rel$orientation <- sub("mostly straight", "Heteroflexible", ori_rel$orientation)
ori_rel$orientation <- sub("Decline", "Omitted", ori_rel$orientation)
ori_rel$orientation <- sub("North", "Omitted", ori_rel$orientation)
ori_rel$orientation <- sub("Sexual", "Omitted", ori_rel$orientation)
ori_rel$orientation <- sub("unlabled", "Omitted", ori_rel$orientation)
ori_rel$orientation <- sub("Straight,het-flex", "Heteroflexible", ori_rel$orientation)
ori_rel$orientation <- sub("Straight,Open", "Heteroflexible", ori_rel$orientation)
ori_rel$orientation <- sub("heteroflexible", "Heteroflexible", ori_rel$orientation)
ori_rel$orientation <- sub("Straight,Until proven otherwise", "Heteroflexible", ori_rel$orientation)
ori_rel$orientation <- sub("Bisexual or pansexual,Pansexual", "Bisexual or pansexual",
                           ori_rel$orientation)
ori_rel$orientation[ori_rel$orientation == ""] <- "Omitted"
ori_rel$orientation <- sub("demisexual", "Demisexual", ori_rel$orientation)
ori_rel$orientation[ori_rel$orientation == "sexual"] <- "Omitted"


# replace other entry with the what was entered manually
ori_rel <- ori_rel %>%
  mutate(relationship = str_replace(relationship, "Other", q8_6_text))

#clean up manual entries
ori_rel$relationship[ori_rel$relationship == "undecided"] <- "Unsure/Questioning"
ori_rel$relationship <- sub("Unsure/Questioning,thinking about more than monogamy for the first time ever",
                            "Unsure/Questioning", ori_rel$relationship)

#more efficient than sub but not going to re-code previous chunk
ori_rel$relationship <- gsub("undecided|Experimenting|Divorced & Wondering","Unsure/Questioning", 
                             ori_rel$relationship)
ori_rel$relationship <- gsub("solo|single/dating|Single|I'm a kid|Queer AF", "Omitted", 
                             ori_rel$relationship)
ori_rel$relationship <- gsub("relationship anarchy|relationship anarchist|Relationship Anardchist"
                             , "Relationship anarchist", ori_rel$relationship)
ori_rel$relationship <- sub("ENM", "Non-monogamous", ori_rel$relationship)
ori_rel$relationship <- sub("aro ace", "Aromantic", ori_rel$relationship)
ori_rel$relationship <- sub("poly by nature, mono by practice", "Polyamorous", ori_rel$relationship)

ori_rel$relationship[ori_rel$relationship == ""] <- "Omitted"
# rows (sometimes) store several responses
# pull them out and separate so reporting can be counted individually
orientation_list <- strsplit(ori_rel$orientation, ",")
orientation_unlist <- unlist(orientation_list)
ori_occurrences <- table(orientation_unlist)
ori_occurrences_df <- as.data.frame(ori_occurrences)

relationship_list <- strsplit(ori_rel$relationship, ",")
relationship_unlist <- unlist(relationship_list)
rel_occur <- table(relationship_unlist)
rel_occur_df <- as.data.frame(rel_occur)
#plot percent of people
ori_df %>% 
  ggplot(aes(x = Orientation, y = Percent, fill = Orientation)) +
  geom_bar(stat = "identity", color = "black", alpha = 0.9) +
  scale_fill_brewer(palette = "Spectral") + 
  labs(
    x = "Orientation",
    y = "Participant percentage",
    title = "\nSexual orientation reported by individuals",
    caption = "Some participants reported multiple orientations and were thus categorized under Multiple responses."
  ) +
   geom_text(aes(label = sprintf("%.1f%%", Percent)), 
            position = position_stack(vjust = 0.5), 
            size = 3.5, color = "black") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  guides(fill = FALSE)

ori_df_2 <- ori_df  %>% 
  arrange(desc(Percent))

knitr::kable(ori_df_2)
Orientation Count Percent
Straight 310 46.3
Bisexual or pansexual 201 30.0
Queer 67 10.0
Multiple responses 41 6.1
Gay or Lesbian 14 2.1
Omitted 12 1.8
Unsure/Questioning 11 1.6
Asexual 7 1.0
Heteroflexible 6 0.9
Demisexual 1 0.1

Of sexual orientations reported, heterosexual (straight) was the most common at 46.3%, followed by bisexual or pansexual at 30%. Compared to Burning Man, straight was reported in 60.4% of the population, followed by bisexual or pansexual (15%), then heteroflexible (10.3%).

Multiple identities was created to handle the participants who selected multiple responses for their sexual orientation. These individuals made up 6.1% of the SOAK population. Details on which orientations were selected more or less frequently are plotted in the next graph.

According to UCLA’s report (https://williamsinstitute.law.ucla.edu/visualization/lgbt-stats/?topic=LGBT#density), the LGBT population in Oregon is 5.6%. This is the 2nd highest reported state LGBT population, only second to DC. The LGBTQIA+ population is over-represented at SOAK and Burning Man events.

# plot occurrences
ori_occurrences_df %>% 
  ggplot(aes(x = Orientation,
             y = Count,
             fill = Orientation)) +
  geom_bar(stat = "identity",
           color = "black") +
  scale_fill_brewer(palette = "Spectral") +
  labs(x = "Orientation",
       y = "Number of times reported",
       title = "\nHow many times a sexual orientation was reported") +
  geom_text(aes(label = Count), 
            position = position_stack(vjust = 0.5), 
            size = 3.5, color = "black") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  guides(fill = FALSE)

ori_occurdf_2 <- ori_occurrences_df  %>% 
  arrange(desc(Percent))

knitr::kable(ori_occurdf_2, caption = "Percent in table below is calculated by the count divided by the number of participants, not the number of responses")
Percent in table below is calculated by the count divided by the number of participants, not the number of responses
Orientation Count Percent
Straight 324 48.4
Bisexual or pansexual 224 33.4
Queer 91 13.6
Unsure/Questioning 22 3.3
Gay or Lesbian 21 3.1
Asexual 12 1.8
Omitted 12 1.8
Heteroflexible 6 0.9
Demisexual 3 0.4

Much like the previous graph, we see straight is the most commonly reported orientation, followed by bisexual or pansexual, then queer. Since this graph does not combine multiple responses into one category, we see the frequency of how each sexual orientation was reported.

The percent is calculated by the number of participants, not the number of responses. Therefore, the percent total will add to over 100%. This should be read that of all participants, 48.4% could have selected straight, 33.4% bisexual, and so on.

The biggest differences when looking at data this way is that unsure/questioning jumps to being reported in 3.3% of participants, and gay/lesbian is reported in 3.1% of participants.

#plot percent of people
relation_df %>% 
  ggplot(aes(x = Relationship, y = Percent, fill = Relationship)) +
  geom_bar(stat = "identity", color = "black") +
  scale_fill_brewer(palette = "Set3") + 
  labs(
    x = "Relationship orientation",
    y = "Participant percentage",
    title = "\nRelationship orientation reported by individuals",
    caption = "Some participants reported multiple relationship orientations and were thus categorized under Multiple responses."
  ) +
   geom_text(aes(label = sprintf("%.1f%%", Percent)), 
            position = position_stack(vjust = 0.5), 
            size = 3.5, color = "black") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  guides(fill = FALSE)

relation_df2 <- relation_df  %>% 
  arrange(desc(Percent))

knitr::kable(relation_df2, caption = "\nRelationship orientation as categorized by a single identifier or multiple responses")
Relationship orientation as categorized by a single identifier or multiple responses
Relationship Count Percent
Monogamous 275 41.0
Polyamorous 151 22.5
Unsure/Questioning 61 9.1
Non-monogamous 60 9.0
Multiple responses 55 8.2
Open relationship 42 6.3
Omitted 24 3.6
Aromantic 1 0.1
Relationship anarchist 1 0.1

When it came to relationship orientation, most participants claimed monogamy at 41%, followed by polyamory at 22.5%, then unsure/questioning at 9.1%. Again, some users selected multiple relationships and were categorized into “Multiple responses” to simplify data analysis and display. These responses are accounted for more specifically in the following graph.

Compared to the Burning Man Census report, monogamy led the relationship orientations at 55.2%, followed by mostly monogamous (not reported at SOAK), then polyamorous at 13.3%.

# plot occurrences
rel_occur_df %>% 
  ggplot(aes(x = Relationship,
             y = Percent,
             fill = Relationship)) +
  geom_bar(stat = "identity",
           color = "black") +
  scale_fill_brewer(palette = "Set3") +
  labs(x = "Relationship orientation",
       y = "Percent relationship was reported",
       title = "\nMonogamous was the most commonly reported relationship orientation") +
  geom_text(aes(label = sprintf("%.1f%%", Percent)), 
            position = position_stack(vjust = 0.5), 
            size = 3.5, color = "black") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  guides(fill = FALSE)

rel_occur_df2 <- rel_occur_df %>% 
  arrange(desc(Percent))

knitr::kable(rel_occur_df2, caption = "The number of times a relationship orientation was reported across all 670 participants, totaling 740 responses")
The number of times a relationship orientation was reported across all 670 participants, totaling 740 responses
Relationship Count Percent
Monogamous 294 43.9
Polyamorous 188 28.1
Non-monogamous 88 13.1
Open relationship 71 10.6
Unsure/Questioning 70 10.4
Omitted 24 3.6
Relationship anarchist 4 0.6
Aromantic 1 0.1

Like the sexual orientation graphs, relationship orientation was analyzed at the individual level then response level. We see frequency of report change the order of relationship orientation.

The percent in this table is calculated over number of responses (740) instead of participant (670). Here we can see the most commonly reported relationship orientation remained monogamous, followed by polyamorous, then non-monogamous. Non-monogamy claimed the 3rd most commonly reported relationship orientation, but was 4th when reported as the sole relationship orientation.

Event Preferences and Practices

Transportation and Lodging

# create df only containing transp and RV data
transpo_rv <- data %>%
  select(arrive, rv)

transpo_rv$arrive[transpo_rv$arrive == ""] <- "Omitted"
transpo_rv$rv[transpo_rv$rv == ""] <- "Omitted"

arrive <- table(transpo_rv$arrive)
arrive <- as.data.frame(arrive)
colnames(arrive) <- c("Arrival", "Count")
arrive$Percent <- round((arrive$Count / sum(arrive$Count) * 100), digits = 1)

rv <- table(transpo_rv$rv)
rv <- as.data.frame(rv)
colnames(rv) <- c("Sleeping", "Count")
rv$Percent <- round((rv$Count / sum(rv$Count) * 100), digits = 1)
#maths for pie chart
pie_labels_transpo <- arrive %>% 
  mutate(csum = rev(cumsum(rev(Percent))),
         pos = Percent/2 + lead(csum, 1),
         pos = if_else(is.na(pos), Percent/2, pos))

#the plot
arrive %>% 
  ggplot(aes(x = "", y = Percent, fill = fct_inorder(Arrival))) +
  geom_col(width = 1, color = 1) +
  coord_polar(theta = "y") +
  scale_fill_brewer(palette = "Pastel1") +
  geom_label_repel(data = pie_labels_transpo,
                   aes(y = pos, label = paste0(Percent, "%")),
                   size = 4.5, nudge_x = 1, show.legend = FALSE) +
  guides(fill = guide_legend(title = "Question: How did you arrive?")) +
  ggtitle("\n\nTransportation method: Most participants carpooled") +
  labs(caption = paste(sum(arrive$Count), "participants, including omitted responses \n")) +
  theme_void()

knitr::kable(arrive)
Arrival Count Percent
Air travel then carpooled 6 0.9
Air travel then drove solo 1 0.1
Carpooled with 1-3 others 456 68.1
Carpooled with 10+ others 2 0.3
Carpooled with 4-9 others 16 2.4
Drove solo 179 26.7
Omitted 10 1.5

The majority (71.7%) of participants carpooled to SOAK. This is less than Burning Man, where in 2022 only 17.4% of Burners drove solo, compared to SOAK where 26.8% drove solo (when additionally accounting for those who flew then drove solo).

#maths for pie chart
pie_labels_rv <- rv %>% 
  mutate(csum = rev(cumsum(rev(Percent))),
         pos = Percent/2 + lead(csum, 1),
         pos = if_else(is.na(pos), Percent/2, pos))

#the plot
rv %>% 
  ggplot(aes(x = "", y = Percent, fill = fct_inorder(Sleeping))) +
  geom_col(width = 1, color = 1) +
  coord_polar(theta = "y") +
  scale_fill_brewer(palette = "Pastel1") +
  geom_label_repel(data = pie_labels_rv,
                   aes(y = pos, label = paste0(Percent, "%")),
                   size = 4.5, nudge_x = 1, show.legend = FALSE) +
  guides(fill = guide_legend(title = "Question: Will you stay in an RV or camping vehicle?")) +
  ggtitle("\n\n\nSleeping arrangements: Most participants tent it!") +
  labs(caption = paste(sum(rv$Count), "participants, including omitted responses \n")) +
  theme_void()

knitr::kable(rv)
Sleeping Count Percent
No, and I’d like to see less RVs 58 8.7
No, but I’d like to stay in one 96 14.3
No, no opinion on RVs 412 61.5
Omitted 14 2.1
Yes 90 13.4

This question bakes in an opinion in addition to gaining information on how participants sleep at SOAK. Only 13.4% of participants stayed in an RV or camping vehicle. This is much less than Burning Man, where 31.2% of people stayed in an RV. Of those who did not stay in RVs, most did not have an opinion on their presence at SOAK. The desire to see less RVs was reported the least.

Ticket Acquisition

data$ticket[data$ticket == ""] <- "Omitted"

#count occurrences
ticket_counts <- table(data$ticket)

#convert the result to a data frame
tickets <- data.frame(tickets = names(ticket_counts), count = as.numeric(ticket_counts))
colnames(tickets) <- c("Ticket", "Count")
tickets$Percent <- round((tickets$Count / sum(tickets$Count) * 100), digits = 1)
#plot ticket sales
tickets %>% 
  ggplot(aes(x = Ticket, y = Percent, fill = Ticket)) +
  geom_bar(stat = "identity", color = "black") +
  scale_fill_brewer(palette = "PRGn") + 
  labs(
    x = "How individuals obtained a ticket",
    y = "Participant percentage",
    title = "\nTicket acquisition method") +
   geom_text(aes(label = sprintf("%.1f%%", Percent)), 
            position = position_stack(vjust = 0.5), 
            size = 3.5, color = "black") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  guides(fill = FALSE)

knitr::kable(tickets)
Ticket Count Percent
Gift ticket (from friend) 31 4.6
Gift ticket (from SOAK org) 17 2.5
Gift ticket (from volunteering) 59 8.8
Omitted 18 2.7
Purchased by friend I repaid 114 17.0
Purchased DGS 162 24.2
Purchased main sale 128 19.1
Purchased OMG sale 53 7.9
Purchased patron sale 24 3.6
Purchased resale 43 6.4
Purchased subsidized 21 3.1

Ticket acquisition for SOAK is mostly done through directed group sales (DGS) at 24.2%, followed by main sale purchase at 19.1%, then paying back a friend who purchased a ticket at 17%. Subsidized tickets (Burning Man’s version of the ticket aid program) accounted for 3.1%.

Compared to the 2022 Burning Man Census report, the most common way to obtain a BM ticket was the main or OMG sale, with 38.9% of participants getting their tickets that way. When adding the OMG sale to main sale for SOAK, they contribute to 26% of ticket acquisition methods. STEP was the second most common method (13.8%), then ticket aid (11.4%).

SOAK Location

#counting occurrences
move_counts <- table(data$move)

#convert the result to a data frame
move <- data.frame(move = names(move_counts), count = as.numeric(move_counts))

colnames(move) <- c("Move", "Count")
move$Percent <- round((move$Count / sum(move$Count) * 100), digits = 1)
#maths for pie chart
pie_labels_move <- move %>% 
  mutate(csum = rev(cumsum(rev(Percent))),
         pos = Percent/2 + lead(csum, 1),
         pos = if_else(is.na(pos), Percent/2, pos))

#the plot
move %>% 
  ggplot(aes(x = "", y = Percent, fill = fct_inorder(Move))) +
  geom_col(width = 1, color = 1) +
  coord_polar(theta = "y") +
  scale_fill_brewer(palette = "Paired") +
  geom_label_repel(data = pie_labels_move,
                   aes(y = pos, label = paste0(Percent, "%")),
                   size = 4.5, nudge_x = 1, show.legend = FALSE) +
  guides(fill = guide_legend(title = "Question: Would you attend SOAK if it moved location?")) +
  ggtitle("\nMost participants would attend SOAK if it moved") +
  labs(caption = paste(sum(move$Count), "participants, including omitted responses \n")) +
  theme_void()

knitr::kable(move)
Move Count Percent
Maybe 229 34.2
No 3 0.4
Omitted 6 0.9
Yes 432 64.5

Most participants (64.5%) would follow SOAK if it moved. Cautiously, 34.2% might attend SOAK if it moved. Perhaps location-dependent? Only a small fraction, 0.4%, claimed they would not attend SOAK if it left Justesen Ranch in Tygh Valley.

Participation

data$participate[data$participate == ""] <- "Omitted"

# rows (sometimes) store several responses
# parse them out
participate_list <- strsplit(data$participate, ",")
participate_unlist <- unlist(participate_list)
party_occurrences <- table(participate_unlist)
participate_df <- as.data.frame(party_occurrences)

# volunteering as soak lead, director, or producer were categorized separately due to commas - combine
colnames(participate_df) <- c("Participation", "Count")

# manually replacing value with original full value
participate_df$Participation <- as.character(participate_df$Participation)
participate_df[9,1] <- "Volunteering as SOAK lead, director, or producer"

# add rows that were combined
participate_df[9,2] <- sum(participate_df$Count[c(1, 2, 9)])

# drop rows 1 and 2
participate_df <- participate_df[-c(1, 2), ]

participate_df$Percent <- round(participate_df$Count / 670 * 100, 1)
participate_df$Participation <- factor(participate_df$Participation,
                                       levels = participate_df$Participation[order(participate_df$Count, decreasing = TRUE)])

participate_df %>% 
  ggplot(aes(x = Participation,
             y = Count,
             fill = Participation)) +
  geom_bar(stat = "identity",
           color = "black") +
  scale_fill_brewer(palette = "BrBG") +
  labs(x = "How participants participate",
       y = "Number of times reported",
       title = "\nHow participants planned to participate with SOAK") +
  geom_text(aes(label = Count), 
            position = position_stack(vjust = 0.5), 
            size = 3.5, color = "black") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  guides(fill = FALSE)

participate_df <- participate_df  %>% 
  arrange(desc(Count))

knitr::kable(participate_df, row.names = FALSE)
Participation Count Percent
Bringing theme camp 390 58.2
Interacting with art or theme camps 278 41.5
Volunteering with 1 or more SOAK department 128 19.1
Hosting an event/workshop 84 12.5
Bringing art project 66 9.9
Omitted 30 4.5
Volunteering as SOAK lead, director, or producer 30 4.5
Member of Precipitation NW Board 4 0.6

How did you participate at SOAK in 2023? Participants could select more than one option, therefore the percent here is out of participants and not responses (totaling above 100). Most people helped bring a theme camp! A whopping 58.2% reported bringing a theme camp (which is good, since the highest reported number of ticket sales went to camps). Second to bringing a camp, 41.5% of people reported interacting with art or theme camps.

The least common forms of participation were volunteering as a lead, director, producer, or board member. Volunteerism in Black Rock City claimed 14.6% of Burners, but 19.1% of SOAKers volunteered with 1 or more SOAK department.

Sustainability

data$sustain[data$sustain == ""] <- "Omitted"

# rows (sometimes) store several responses
# parse them out
sustain_list <- strsplit(data$sustain, ",")
sustain_unlist <- unlist(sustain_list)
sustain_occurrences <- table(sustain_unlist)
sustain_df <- as.data.frame(sustain_occurrences)

colnames(sustain_df) <- c("Sustainability", "Count")
sustain_df$Percent <- round(sustain_df$Count / 670 * 100, 1)
sustain_df$Sustainability <- factor(sustain_df$Sustainability,
                                    levels = sustain_df$Sustainability[order(sustain_df$Percent,
                                                                             decreasing = TRUE)])
sustain_df %>% 
  ggplot(aes(x = Sustainability,
             y = Percent,
             fill = Sustainability)) +
  geom_bar(stat = "identity",
           color = "black") +
  scale_fill_brewer(palette = "YlGn") +
  labs(x = "Sustainability method",
       y = "Percent of surveyed individuals",
       title = "\nHow participants were sustainable at SOAK") +
  geom_text(aes(label = sprintf("%.1f%%", Percent)), 
            position = position_stack(vjust = 0.5), 
            size = 3.5, color = "black") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  guides(fill = FALSE)

sustain_df <- sustain_df  %>% 
  arrange(desc(Percent))

knitr::kable(sustain_df, row.names = FALSE)
Sustainability Count Percent
Recycling 536 80.0
Carpooling 429 64.0
Composting 218 32.5
No single-use items 192 28.7
Solar power 172 25.7
Plant-based diet 132 19.7
Omitted 20 3.0
Other 18 2.7

Of the 670 participants surveyed, 80% reported they recycle at SOAK. This is amazing! The next most common way to contribute to sustainability was carpooling at 64%. However, in the question of “how did you arrive at SOAK?”, a total of 71.7% of participants reported carpooling. The discrepancy may be explained by incorrect reporting by participants or user error in data entry.

Inclusion of Minors

# create df only containing bringing kids data
kids <- data %>%
  select(kids, teens)

#replace blanks with Omitted
kids$kids[kids$kids == ""] <- "Omitted"
kids$teens[kids$teens == ""] <- "Omitted"

kids_counts <- table(kids$kids)
teens_counts <- table(kids$teens)

#convert the result to a data frame
kids_2 <- data.frame(kids = names(kids_counts), count = as.numeric(kids_counts))
teens <- data.frame(teens = names(teens_counts), count = as.numeric(teens_counts))

colnames(kids_2) <- c("Kids", "Count")
kids_2$Percent <- round((kids_2$Count / sum(kids_2$Count) * 100), digits = 1)
colnames(teens) <- c("Teens", "Count")
teens$Percent <- round((teens$Count / sum(teens$Count) * 100), digits = 1)
#maths for pie chart
pie_labels_kids <- kids_2 %>% 
  mutate(csum = rev(cumsum(rev(Percent))),
         pos = Percent/2 + lead(csum, 1),
         pos = if_else(is.na(pos), Percent/2, pos))

#the plot
kids_2 %>% 
  ggplot(aes(x = "", y = Percent, fill = fct_inorder(Kids))) +
  geom_col(width = 1, color = 1) +
  coord_polar(theta = "y") +
  scale_fill_brewer(palette = "PiYG") +
  geom_label_repel(data = pie_labels_kids,
                   aes(y = pos, label = paste0(Percent, "%")),
                   size = 4.5, nudge_x = 1, show.legend = FALSE) +
  guides(fill = guide_legend(title = "Question: Did you bring any children 12 or younger?")) +
  ggtitle("\nMost participants did not bring children") +
  labs(caption = paste(sum(kids_2$Count), "participants, including omitted responses \n")) +
  theme_void()

knitr::kable(kids_2)
Kids Count Percent
No 629 93.9
Omitted 13 1.9
Yes 28 4.2
#maths for pie chart
pie_labels_teens <- teens %>% 
  mutate(csum = rev(cumsum(rev(Percent))),
         pos = Percent/2 + lead(csum, 1),
         pos = if_else(is.na(pos), Percent/2, pos))

#the plot
teens %>% 
  ggplot(aes(x = "", y = Percent, fill = fct_inorder(Teens))) +
  geom_col(width = 1, color = 1) +
  coord_polar(theta = "y") +
  scale_fill_brewer(palette = "PRGn") +
  geom_label_repel(data = pie_labels_teens,
                   aes(y = pos, label = paste0(Percent, "%")),
                   size = 4.5, nudge_x = 1, show.legend = FALSE) +
  guides(fill = guide_legend(title = "Question: Did you bring any teenagers?")) +
  ggtitle("\nMost participants did not bring teenagers") +
  labs(caption = paste(sum(teens$Count), "participants, including omitted responses \n")) +
  theme_void()

knitr::kable(teens)
Teens Count Percent
No 636 94.9
Omitted 17 2.5
Yes 17 2.5

Most participants did not bring children nor teenagers to SOAK. The 4.2% of people who brought children and 2.5% who brought teenagers were much higher than those who brought children and teenagers to Burning Man, 1.9% and 0.6% respectively.

Town Hall and Burning Man

#counting occurrences
townhall_counts <- table(data$townhall)

#convert the result to a data frame
townhall <- data.frame(move = names(townhall_counts), count = as.numeric(townhall_counts))

colnames(townhall) <- c("Townhall", "Count")
townhall$Percent <- round((townhall$Count / sum(townhall$Count) * 100), digits = 1)
#maths for pie chart
pie_labels_town <- townhall %>% 
  mutate(csum = rev(cumsum(rev(Percent))),
         pos = Percent/2 + lead(csum, 1),
         pos = if_else(is.na(pos), Percent/2, pos))

#the plot
townhall %>% 
  ggplot(aes(x = "", y = Percent, fill = fct_inorder(Townhall))) +
  geom_col(width = 1, color = 1) +
  coord_polar(theta = "y") +
  scale_fill_brewer(palette = "RdYlBu") +
  geom_label_repel(data = pie_labels_town,
                   aes(y = pos, label = paste0(Percent, "%")),
                   size = 4.5, nudge_x = 1, show.legend = FALSE) +
  guides(fill = guide_legend(title = "Question: Did you attend Town Hall?")) +
  ggtitle("\nMost participants did not attend Town Hall ") +
  labs(caption = paste(sum(townhall$Count), "participants, including omitted responses \n")) +
  theme_void()

knitr::kable(townhall)
Townhall Count Percent
I don’t know what that is 117 17.5
No 499 74.5
Omitted 10 1.5
Yes 44 6.6

Only 6.6% of people attended SOAK’s Town Hall in February, 2023. Most people reported not attending (74.5%), followed by a lack of knowing what Town Hall is (17.5%).

#counting occurrences
bm_counts <- table(data$bm)

#convert the result to a data frame
burning_man <- data.frame(move = names(bm_counts), count = as.numeric(bm_counts))

colnames(burning_man) <- c("BM", "Count")
burning_man$Percent <- round((burning_man$Count / sum(burning_man$Count) * 100), digits = 1)
#maths for pie chart
pie_labels_bm <- burning_man %>% 
  mutate(csum = rev(cumsum(rev(Percent))),
         pos = Percent/2 + lead(csum, 1),
         pos = if_else(is.na(pos), Percent/2, pos))

#the plot
burning_man %>% 
  ggplot(aes(x = "", y = Percent, fill = fct_inorder(BM))) +
  geom_col(width = 1, color = 1) +
  coord_polar(theta = "y") +
  scale_fill_brewer(palette = "BuPu") +
  geom_label_repel(data = pie_labels_bm,
                   aes(y = pos, label = paste0(Percent, "%")),
                   size = 4.5, nudge_x = 1, show.legend = FALSE) +
  guides(fill = guide_legend(title = "Question: Do you plan to go to Burning Man this year?")) +
  ggtitle("\nMost participants did not plan to go to Burning Man ") +
  labs(caption = paste(sum(burning_man$Count), "participants, including omitted responses \n")) +
  theme_void()

knitr::kable(burning_man)
BM Count Percent
No 401 59.9
Omitted 7 1.0
Unsure 114 17.0
Yes 148 22.1

The majority of participants (59.9%) did not plan to go to Burning Man, followed by 22.1% planned to go.

Commuincation

# create df only containing communication data
comm <- data %>%
  select(info, q24_9_text)

comm$q24_9_text <- gsub("Carrier pigeon|Apparating|Search", "Omitted", comm$q24_9_text)

comm <- comm %>%
  mutate(info = str_replace(info, "Other", q24_9_text))

comm$info <- gsub("Friends|Friend|friends|friend|Dad|Husband|mother|parents|Partner|Soak Ranger|coco|emails from theme camp|House Potty Discord|Sound Camp Meeting|Slack|slack|campchat|Producers", 
                  "Word of mouth", comm$info)

# remove the second occurrence of "Word of mouth" since some entries of "Other" also had Word of mouth selected
comm <- comm %>% 
  mutate(info = gsub("Word of mouth,Word of mouth", "Word of mouth", info))
# rows (sometimes) store several responses
# parse them out
comm_list <- strsplit(comm$info, ",")
comm_unlist <- unlist(comm_list)
comm_occurrences <- table(comm_unlist)
comm_df <- as.data.frame(comm_occurrences)

colnames(comm_df) <- c("Information", "Count")
comm_df$Percent <- round(comm_df$Count / sum(comm_df$Count) * 100, 1)
comm_df$Information <- factor(comm_df$Information,
                                    levels = comm_df$Information[order(comm_df$Percent,
                                                                             decreasing = TRUE)])
comm_df %>% 
  ggplot(aes(x = Information,
             y = Count,
             fill = Information)) +
  geom_bar(stat = "identity",
           color = "black") +
  scale_fill_brewer(palette = "Blues") +
  labs(x = "Information method",
       y = "Number of times reported",
       title = "\nHow participants received SOAK related information") +
  geom_text(aes(label = Count), 
            position = position_stack(vjust = 0.5), 
            size = 3.5, color = "black") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  guides(fill = FALSE)

comm_df <- comm_df  %>% 
  arrange(desc(Percent))

knitr::kable(comm_df, row.names = FALSE)
Information Count Percent
SOAK mailing list 339 26.2
Word of mouth 330 25.5
SOAK website 310 24.0
Facebook 211 16.3
Portland-announce list 57 4.4
Omitted 24 1.9
Instagram 15 1.2
Telegram 6 0.5
Twitter 2 0.2

Participants used a few main ways to obtain SOAK related information in 2023. The SOAK mailing list, word of mouth (friends, camps, etc.), and the SOAK website were the top ways to learn about SOAK. Facebook was the 4th most commonly reported information channel.

Participants could select multiple responses for this question. The percent was calculated using the number of responses.

Links to cited data

Burning Man Census: https://blackrockcitycensus.org/

oregon dot gov ^ 1: https://www.oregon.gov/oha/PH/BIRTHDEATHCERTIFICATES/SURVEYS/OREGONHEALTHYTEENS/Documents/2019/Gender/11th/Demog11.pdf

oregon dot gov ^ 2: https://www.oregon.gov/highered/research/Pages/educational-attainment.aspx